* Table A1c: Baseline summary statistics and balance tests for attritors
* Sample used to study investments and agricultural outcomes
* Last Updated, Niriksha Shetty 03/31/206

clear
clear mata
clear matrix
set maxvar 20000
set more off

pwd
cap cd c(`pwd')

estimates clear

use "../dta/reduced-form-master.dta", clear

* dropping  treatment expansion
drop if firstt_2009==1

* drop people temporarily added into the sample
drop if add_sample_w2==1 | add_sample_w5==1  

* marketing treatments
local uniqinst "assigned_risk_ws_2008 discount_2008 rebate_50percentoff rebate2_1free rebate3_1free flyer_hyv bdmperc_2009 fourbdmperc_2009 disc4game_2009 bdmperc_2010 fourbdmperc_2010 disc4game_2010 assigned_risk_ws_2010 assigned_video_test assigned_drought_flyer assigned_subsidies_flyer assigned_loan bdmperc_2011 fourbdmperc_2011 disc4game_2011 bdmperc_2012 fourbdmperc_2012 disc4game_2012 bdmperc_2013 fourbdmperc_2013 disc4game_2013"  /* sewaT peerT assigned_risk_ws_2011 assigned_risk_ws_2012 assigned_risk_ws_2013 mrkt_allnegative mrkt_poslang mrkt_posimg*/

foreach var of varlist `uniqinst' {
	replace `var' = 0 if `var'==.
	replace `var' = 0 if wave==0
	}

foreach var of varlist `uniqinst' {
	replace `var' = 0 if treat_year==0
}

* attritors are those not in balanced panel
* did not participate in survey and/or reported missing ag revenue/costs
gen EA_A=0
replace EA_A=1 if bal_panel3==0

* ever attrited i.e. left even once in the entire survey, only look at baseline
bys id : egen ever_attrit=max(EA_A)

* Sum stats by treatment group
local vars "kval_w1_real cost_k_w1_real b110rk_w1_real b111rk_w1_real b112pk_w1_real total_mandays_k_w1 total_hired_mandays_k_w1 total_fam_mandays_k_w1 profit_k_w1_real fraction_hy_k fraction_cashcrop_k plotsize_ha totalrevenues_w1_real totalcosts_w1_real totalprofit_w1_real"
qui estpost summarize `vars' if mkt_year==2005 & EA_A==1
est sto A 
qui estpost summarize `vars' if originaltvillage==1 & mkt_year==2005 & EA_A==1
est sto B 
qui estpost summarize `vars' if firstt_2007==1 & mkt_year==2005 & EA_A==1
est sto C
qui estpost summarize `vars' if Tvillage==0 & mkt_year==2005 & EA_A==1
est sto D
esttab A D B C using "../output/sum-a1c.csv", cells("mean(fmt(2))" replace sd(par fmt(2))) nolabel noabbrev title("Summary statistics") collabels("Mean") mtitle("All" "C" "T1" "T2") nonum replace 


* baseline balance test
tempvar tmp
gen `tmp' = .
foreach var of local vars {
reg `var' originaltvillage firstt_2007 if mkt_year == 2005 & EA_A==1, cluster(villageno)
replace `tmp' = Ftail(e(df_m),e(df_r),e(F))
label var `tmp' `var'
estpost summarize `tmp' if mkt_year == 2005
est sto A
replace `tmp' = .
esttab A using "../output/bal-a1c.csv", cells("mean(fmt(2))") label noabbrev collabels("p-value") mtitle("") noobs nonum append
estimates clear
}
